#This code runs STM for various numbers of topics (n = 20, 25, 30, 35, 40 with no covariates, 45, 50)

library(tidyverse)
library(arabicStemR)
library(tidytext)
library(topicmodels)
library(lubridate)
library(ggplot2)
library(quanteda)
library(stm)

rm(list = ls())
load("thawrah.RData")

docs = thawrah$documents
vocab = thawrah$vocab
meta = thawrah$meta

# topic model -------------------------------------------------------------

#20 topics
t = Sys.time()
topic_model = stm(documents = docs, vocab = vocab, K = 20, init.type = "Spectral", prevalence = ~ rvln, data = meta, seed = 2018)
Sys.time() - t

save(topic_model, meta, file = "k20.RData")


#25 topics
t = Sys.time()
topic_model = stm(documents = docs, vocab = vocab, K = 25, init.type = "Spectral", prevalence = ~ rvln, data = meta, seed = 2018)
Sys.time() - t

save(topic_model, meta, file = "k25.RData")


#30 topics
t = Sys.time()
topic_model = stm(documents = docs, vocab = vocab, K = 30, init.type = "Spectral", prevalence = ~ rvln, data = meta, seed = 2018)
Sys.time() - t

save(topic_model, meta, file = "k30.RData")


#35 topics
t = Sys.time()
topic_model = stm(documents = docs, vocab = vocab, K = 35, init.type = "Spectral", prevalence = ~ rvln, data = meta, seed = 2018)
Sys.time() - t

save(topic_model, meta, file = "k35.RData")


#45 topics
t = Sys.time()
topic_model = stm(documents = docs, vocab = vocab, K = 45, init.type = "Spectral", prevalence = ~ rvln, data = meta, seed = 2018)
Sys.time() - t

save(topic_model, meta, file = "k45.RData")


#50 topics
t = Sys.time()
topic_model = stm(documents = docs, vocab = vocab, K = 50, init.type = "Spectral", prevalence = ~ rvln, data = meta, seed = 2018)
Sys.time() - t

save(topic_model, meta, file = "k50.RData")



# No covariates -----------------------------------------------------------
#40 topics
t = Sys.time()
topic_model = stm(documents = docs, vocab = vocab, K = 40, init.type = "Spectral", data = meta, seed = 2018)
Sys.time() - t

save(topic_model, meta, file = "k40_no_covars.RData")





